{"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"colab":{"provenance":[]},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":8365316,"sourceType":"datasetVersion","datasetId":4972363},{"sourceId":8366178,"sourceType":"datasetVersion","datasetId":4972994},{"sourceId":8366331,"sourceType":"datasetVersion","datasetId":4973122},{"sourceId":8366761,"sourceType":"datasetVersion","datasetId":4973462},{"sourceId":8367381,"sourceType":"datasetVersion","datasetId":4973939},{"sourceId":8367445,"sourceType":"datasetVersion","datasetId":4973985}],"dockerImageVersionId":30699,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\npath=\"/kaggle/input/llms-2\"\nos.chdir(path)\nos.listdir(path)","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Vb_ma3uVxQJm","outputId":"19e4e73b-3695-4a2d-ca55-cc600f049d14","execution":{"iopub.status.busy":"2024-05-09T18:26:07.685062Z","iopub.execute_input":"2024-05-09T18:26:07.685415Z","iopub.status.idle":"2024-05-09T18:26:07.704482Z","shell.execute_reply.started":"2024-05-09T18:26:07.685384Z","shell.execute_reply":"2024-05-09T18:26:07.703479Z"},"trusted":true},"execution_count":1,"outputs":[{"execution_count":1,"output_type":"execute_result","data":{"text/plain":"['LLM.pdf', 'README.md', 'indexer.py', 'rag.py']"},"metadata":{}}]},{"cell_type":"code","source":"! pip install vllm","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:26:08.032493Z","iopub.execute_input":"2024-05-09T18:26:08.032815Z","iopub.status.idle":"2024-05-09T18:31:04.956046Z","shell.execute_reply.started":"2024-05-09T18:26:08.032788Z","shell.execute_reply":"2024-05-09T18:31:04.955070Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Collecting vllm\n  Downloading vllm-0.4.2-cp310-cp310-manylinux1_x86_64.whl.metadata (9.1 kB)\nCollecting cmake>=3.21 (from vllm)\n  Downloading cmake-3.29.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.1 kB)\nRequirement already satisfied: ninja in /opt/conda/lib/python3.10/site-packages (from vllm) (1.11.1.1)\nRequirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from vllm) (5.9.3)\nRequirement already satisfied: sentencepiece in /opt/conda/lib/python3.10/site-packages (from vllm) (0.2.0)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from vllm) (1.26.4)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from vllm) (2.31.0)\nRequirement already satisfied: py-cpuinfo in /opt/conda/lib/python3.10/site-packages (from vllm) (9.0.0)\nCollecting transformers>=4.40.0 (from vllm)\n  Downloading transformers-4.40.2-py3-none-any.whl.metadata (137 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.0/138.0 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hCollecting tokenizers>=0.19.1 (from vllm)\n  Downloading tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\nRequirement already satisfied: fastapi in /opt/conda/lib/python3.10/site-packages (from vllm) (0.108.0)\nCollecting openai (from vllm)\n  Downloading openai-1.27.0-py3-none-any.whl.metadata (21 kB)\nRequirement already satisfied: uvicorn[standard] in /opt/conda/lib/python3.10/site-packages (from vllm) (0.25.0)\nRequirement already satisfied: pydantic>=2.0 in /opt/conda/lib/python3.10/site-packages (from vllm) (2.5.3)\nRequirement already satisfied: prometheus-client>=0.18.0 in /opt/conda/lib/python3.10/site-packages (from vllm) (0.19.0)\nCollecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)\n  Downloading prometheus_fastapi_instrumentator-7.0.0-py3-none-any.whl.metadata (13 kB)\nCollecting tiktoken==0.6.0 (from vllm)\n  Downloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\nCollecting lm-format-enforcer==0.9.8 (from vllm)\n  Downloading lm_format_enforcer-0.9.8-py3-none-any.whl.metadata (14 kB)\nCollecting outlines==0.0.34 (from vllm)\n  Downloading outlines-0.0.34-py3-none-any.whl.metadata (13 kB)\nRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from vllm) (4.9.0)\nRequirement already satisfied: filelock>=3.10.4 in /opt/conda/lib/python3.10/site-packages (from vllm) (3.13.1)\nRequirement already satisfied: ray>=2.9 in /opt/conda/lib/python3.10/site-packages (from vllm) (2.9.0)\nRequirement already satisfied: nvidia-ml-py in /opt/conda/lib/python3.10/site-packages (from vllm) (11.495.46)\nCollecting vllm-nccl-cu12<2.19,>=2.18 (from vllm)\n  Downloading vllm_nccl_cu12-2.18.1.0.4.0.tar.gz (6.2 kB)\n  Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hCollecting torch==2.3.0 (from vllm)\n  Downloading torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)\nCollecting xformers==0.0.26.post1 (from vllm)\n  Downloading xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.0 kB)\nCollecting interegular>=0.3.2 (from lm-format-enforcer==0.9.8->vllm)\n  Downloading interegular-0.3.3-py37-none-any.whl.metadata (3.0 kB)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from lm-format-enforcer==0.9.8->vllm) (21.3)\nRequirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from lm-format-enforcer==0.9.8->vllm) (6.0.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (3.1.2)\nCollecting lark (from outlines==0.0.34->vllm)\n  Downloading lark-1.1.9-py3-none-any.whl.metadata (1.9 kB)\nRequirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (1.5.8)\nRequirement already satisfied: cloudpickle in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (2.2.1)\nCollecting diskcache (from outlines==0.0.34->vllm)\n  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\nRequirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (1.11.4)\nRequirement already satisfied: numba in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (0.58.1)\nRequirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (1.4.0)\nRequirement already satisfied: referencing in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (0.32.1)\nRequirement already satisfied: jsonschema in /opt/conda/lib/python3.10/site-packages (from outlines==0.0.34->vllm) (4.20.0)\nRequirement already satisfied: regex>=2022.1.18 in /opt/conda/lib/python3.10/site-packages (from tiktoken==0.6.0->vllm) (2023.12.25)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch==2.3.0->vllm) (1.12)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch==2.3.0->vllm) (3.2.1)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch==2.3.0->vllm) (2024.2.0)\nCollecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0->vllm)\n  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0->vllm)\n  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.0->vllm)\n  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.0->vllm)\n  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.3.0->vllm)\n  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.3.0->vllm)\n  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-curand-cu12==10.3.2.106 (from torch==2.3.0->vllm)\n  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.3.0->vllm)\n  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.3.0->vllm)\n  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-nccl-cu12==2.20.5 (from torch==2.3.0->vllm)\n  Downloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\nCollecting nvidia-nvtx-cu12==12.1.105 (from torch==2.3.0->vllm)\n  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\nCollecting triton==2.3.0 (from torch==2.3.0->vllm)\n  Downloading triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)\nCollecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.3.0->vllm)\n  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nRequirement already satisfied: starlette<1.0.0,>=0.30.0 in /opt/conda/lib/python3.10/site-packages (from prometheus-fastapi-instrumentator>=7.0.0->vllm) (0.32.0.post1)\nRequirement already satisfied: annotated-types>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from pydantic>=2.0->vllm) (0.6.0)\nRequirement already satisfied: pydantic-core==2.14.6 in /opt/conda/lib/python3.10/site-packages (from pydantic>=2.0->vllm) (2.14.6)\nRequirement already satisfied: click>=7.0 in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (8.1.7)\nRequirement already satisfied: msgpack<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.0.7)\nRequirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (3.20.3)\nRequirement already satisfied: aiosignal in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.3.1)\nRequirement already satisfied: frozenlist in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.4.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (2024.2.2)\nRequirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/conda/lib/python3.10/site-packages (from tokenizers>=0.19.1->vllm) (0.22.2)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.40.0->vllm) (0.4.3)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.40.0->vllm) (4.66.1)\nRequirement already satisfied: anyio<5,>=3.5.0 in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (4.2.0)\nRequirement already satisfied: distro<2,>=1.7.0 in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (1.9.0)\nRequirement already satisfied: httpx<1,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (0.27.0)\nRequirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (1.3.0)\nRequirement already satisfied: h11>=0.8 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.14.0)\nRequirement already satisfied: httptools>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.6.1)\nRequirement already satisfied: python-dotenv>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (1.0.0)\nRequirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.19.0)\nRequirement already satisfied: watchfiles>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.21.0)\nRequirement already satisfied: websockets>=10.4 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (12.0)\nRequirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai->vllm) (1.2.0)\nRequirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai->vllm) (1.0.5)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->lm-format-enforcer==0.9.8->vllm) (3.1.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->outlines==0.0.34->vllm) (2.1.3)\nRequirement already satisfied: attrs>=22.2.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema->outlines==0.0.34->vllm) (23.2.0)\nRequirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema->outlines==0.0.34->vllm) (2023.12.1)\nRequirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema->outlines==0.0.34->vllm) (0.16.2)\nRequirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /opt/conda/lib/python3.10/site-packages (from numba->outlines==0.0.34->vllm) (0.41.1)\nRequirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch==2.3.0->vllm) (1.3.0)\nDownloading vllm-0.4.2-cp310-cp310-manylinux1_x86_64.whl (67.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.7/67.7 MB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading lm_format_enforcer-0.9.8-py3-none-any.whl (40 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading outlines-0.0.34-py3-none-any.whl (76 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m63.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m779.1/779.1 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:03\u001b[0mm\n\u001b[?25hDownloading xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl (222.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.7/222.7 MB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n\u001b[?25hDownloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:02\u001b[0mm\n\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m55.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.2/176.2 MB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (168.1 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.1/168.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading cmake-3.29.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.7/26.7 MB\u001b[0m \u001b[31m48.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading prometheus_fastapi_instrumentator-7.0.0-py3-none-any.whl (19 kB)\nDownloading tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m61.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n\u001b[?25hDownloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m69.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading openai-1.27.0-py3-none-any.whl (314 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m314.1/314.1 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading interegular-0.3.3-py37-none-any.whl (23 kB)\nDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading lark-1.1.9-py3-none-any.whl (111 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.7/111.7 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m54.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hBuilding wheels for collected packages: vllm-nccl-cu12\n  Building wheel for vllm-nccl-cu12 (setup.py) ... \u001b[?25ldone\n\u001b[?25h  Created wheel for vllm-nccl-cu12: filename=vllm_nccl_cu12-2.18.1.0.4.0-py3-none-any.whl size=5419 sha256=56ca3b1b980e45bd5a885d12d268c968634f2331323f6fb7468bc48dbee22b6b\n  Stored in directory: /root/.cache/pip/wheels/d1/28/b5/e99e6ea84b08c0bf19a218d408316e55e02ff725d3616fb79d\nSuccessfully built vllm-nccl-cu12\nInstalling collected packages: vllm-nccl-cu12, triton, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, lark, interegular, diskcache, cmake, tiktoken, nvidia-cusparse-cu12, nvidia-cudnn-cu12, tokenizers, prometheus-fastapi-instrumentator, openai, nvidia-cusolver-cu12, lm-format-enforcer, transformers, torch, xformers, outlines, vllm\n  Attempting uninstall: tokenizers\n    Found existing installation: tokenizers 0.15.2\n    Uninstalling tokenizers-0.15.2:\n      Successfully uninstalled tokenizers-0.15.2\n  Attempting uninstall: transformers\n    Found existing installation: transformers 4.39.3\n    Uninstalling transformers-4.39.3:\n      Successfully uninstalled transformers-4.39.3\n  Attempting uninstall: torch\n    Found existing installation: torch 2.1.2\n    Uninstalling torch-2.1.2:\n      Successfully uninstalled torch-2.1.2\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\nfastai 2.7.14 requires torch<2.3,>=1.10, but you have torch 2.3.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed cmake-3.29.2 diskcache-5.6.3 interegular-0.3.3 lark-1.1.9 lm-format-enforcer-0.9.8 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 openai-1.27.0 outlines-0.0.34 prometheus-fastapi-instrumentator-7.0.0 tiktoken-0.6.0 tokenizers-0.19.1 torch-2.3.0 transformers-4.40.2 triton-2.3.0 vllm-0.4.2 vllm-nccl-cu12-2.18.1.0.4.0 xformers-0.0.26.post1\n","output_type":"stream"}]},{"cell_type":"code","source":"! export VLLM_USE_MODELSCOPE=True","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:31:04.957817Z","iopub.execute_input":"2024-05-09T18:31:04.958118Z","iopub.status.idle":"2024-05-09T18:31:05.929515Z","shell.execute_reply.started":"2024-05-09T18:31:04.958091Z","shell.execute_reply":"2024-05-09T18:31:05.928253Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"# ! python -m vllm.entrypoints.openai.api_server --model 'qwen/Qwen-7B-Chat-Int4' --trust-remote-code -q gptq --dtype float16 --gpu-memory-utilization 0.8","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"g03abQPAy_rD","outputId":"e66726d2-51c5-4683-8019-fb02761d1ba7","execution":{"iopub.status.busy":"2024-05-09T18:31:05.930952Z","iopub.execute_input":"2024-05-09T18:31:05.931269Z","iopub.status.idle":"2024-05-09T18:31:05.935848Z","shell.execute_reply.started":"2024-05-09T18:31:05.931240Z","shell.execute_reply":"2024-05-09T18:31:05.934915Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"! pip install langchain_community langchain pypdf rapidocr-onnxruntime modelscope faiss-cpu langchain_openai","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dDLui07ExtD8","outputId":"da55bce0-4cbd-4454-863e-0c0449565d3a","execution":{"iopub.status.busy":"2024-05-09T18:31:05.937755Z","iopub.execute_input":"2024-05-09T18:31:05.938058Z","iopub.status.idle":"2024-05-09T18:31:46.065547Z","shell.execute_reply.started":"2024-05-09T18:31:05.938034Z","shell.execute_reply":"2024-05-09T18:31:46.064380Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stdout","text":"Collecting langchain_community\n  Downloading langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)\nCollecting langchain\n  Downloading langchain-0.1.19-py3-none-any.whl.metadata (13 kB)\nRequirement already satisfied: pypdf in /opt/conda/lib/python3.10/site-packages (4.2.0)\nCollecting rapidocr-onnxruntime\n  Downloading rapidocr_onnxruntime-1.3.17-py3-none-any.whl.metadata (1.2 kB)\nCollecting modelscope\n  Downloading modelscope-1.14.0-py3-none-any.whl.metadata (33 kB)\nCollecting faiss-cpu\n  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\nCollecting langchain_openai\n  Downloading langchain_openai-0.1.6-py3-none-any.whl.metadata (2.5 kB)\nRequirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (6.0.1)\nRequirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (2.0.25)\nRequirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (3.9.1)\nRequirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (0.6.4)\nCollecting langchain-core<0.2.0,>=0.1.52 (from langchain_community)\n  Downloading langchain_core-0.1.52-py3-none-any.whl.metadata (5.9 kB)\nCollecting langsmith<0.2.0,>=0.1.0 (from langchain_community)\n  Downloading langsmith-0.1.56-py3-none-any.whl.metadata (13 kB)\nRequirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (1.26.4)\nRequirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (2.31.0)\nRequirement already satisfied: tenacity<9.0.0,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain_community) (8.2.3)\nRequirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (4.0.3)\nCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)\n  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl.metadata (2.0 kB)\nRequirement already satisfied: pydantic<3,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.5.3)\nRequirement already satisfied: typing_extensions>=4.0 in /opt/conda/lib/python3.10/site-packages (from pypdf) (4.9.0)\nRequirement already satisfied: pyclipper>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from rapidocr-onnxruntime) (1.3.0.post5)\nCollecting onnxruntime>=1.7.0 (from rapidocr-onnxruntime)\n  Downloading onnxruntime-1.17.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.4 kB)\nRequirement already satisfied: opencv-python>=4.5.1.48 in /opt/conda/lib/python3.10/site-packages (from rapidocr-onnxruntime) (4.9.0.80)\nRequirement already satisfied: six>=1.15.0 in /opt/conda/lib/python3.10/site-packages (from rapidocr-onnxruntime) (1.16.0)\nRequirement already satisfied: Shapely>=1.7.1 in /opt/conda/lib/python3.10/site-packages (from rapidocr-onnxruntime) (1.8.5.post1)\nRequirement already satisfied: Pillow in /opt/conda/lib/python3.10/site-packages (from rapidocr-onnxruntime) (9.5.0)\nCollecting addict (from modelscope)\n  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)\nRequirement already satisfied: attrs in /opt/conda/lib/python3.10/site-packages (from modelscope) (23.2.0)\nRequirement already satisfied: datasets<2.19.0,>=2.16.0 in /opt/conda/lib/python3.10/site-packages (from modelscope) (2.18.0)\nCollecting einops (from modelscope)\n  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)\nRequirement already satisfied: filelock>=3.3.0 in /opt/conda/lib/python3.10/site-packages (from modelscope) (3.13.1)\nRequirement already satisfied: gast>=0.2.2 in /opt/conda/lib/python3.10/site-packages (from modelscope) (0.5.4)\nRequirement already satisfied: huggingface-hub in /opt/conda/lib/python3.10/site-packages (from modelscope) (0.22.2)\nCollecting oss2 (from modelscope)\n  Downloading oss2-2.18.5.tar.gz (283 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.4/283.4 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from modelscope) (2.1.4)\nRequirement already satisfied: pyarrow!=9.0.0,>=6.0.0 in /opt/conda/lib/python3.10/site-packages (from modelscope) (15.0.2)\nRequirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.10/site-packages (from modelscope) (2.9.0.post0)\nRequirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from modelscope) (1.11.4)\nRequirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from modelscope) (69.0.3)\nRequirement already satisfied: simplejson>=3.3.0 in /opt/conda/lib/python3.10/site-packages (from modelscope) (3.19.2)\nRequirement already satisfied: sortedcontainers>=1.5.9 in /opt/conda/lib/python3.10/site-packages (from modelscope) (2.4.0)\nRequirement already satisfied: tqdm>=4.64.0 in /opt/conda/lib/python3.10/site-packages (from modelscope) (4.66.1)\nRequirement already satisfied: urllib3>=1.26 in /opt/conda/lib/python3.10/site-packages (from modelscope) (1.26.18)\nRequirement already satisfied: yapf in /opt/conda/lib/python3.10/site-packages (from modelscope) (0.40.2)\nRequirement already satisfied: openai<2.0.0,>=1.24.0 in /opt/conda/lib/python3.10/site-packages (from langchain_openai) (1.27.0)\nRequirement already satisfied: tiktoken<1,>=0.5.2 in /opt/conda/lib/python3.10/site-packages (from langchain_openai) (0.6.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (6.0.4)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.9.3)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.4.1)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.3.1)\nRequirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/conda/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (3.21.1)\nRequirement already satisfied: typing-inspect<1,>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (0.9.0)\nRequirement already satisfied: pyarrow-hotfix in /opt/conda/lib/python3.10/site-packages (from datasets<2.19.0,>=2.16.0->modelscope) (0.6)\nRequirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets<2.19.0,>=2.16.0->modelscope) (0.3.8)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets<2.19.0,>=2.16.0->modelscope) (3.4.1)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets<2.19.0,>=2.16.0->modelscope) (0.70.16)\nRequirement already satisfied: fsspec<=2024.2.0,>=2023.1.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]<=2024.2.0,>=2023.1.0->datasets<2.19.0,>=2.16.0->modelscope) (2024.2.0)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets<2.19.0,>=2.16.0->modelscope) (21.3)\nRequirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.2.0,>=0.1.52->langchain_community) (1.33)\nCollecting packaging (from datasets<2.19.0,>=2.16.0->modelscope)\n  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\nCollecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.0->langchain_community)\n  Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.7/49.7 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hCollecting coloredlogs (from onnxruntime>=1.7.0->rapidocr-onnxruntime)\n  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)\nRequirement already satisfied: flatbuffers in /opt/conda/lib/python3.10/site-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime) (23.5.26)\nRequirement already satisfied: protobuf in /opt/conda/lib/python3.10/site-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime) (3.20.3)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from onnxruntime>=1.7.0->rapidocr-onnxruntime) (1.12)\nRequirement already satisfied: anyio<5,>=3.5.0 in /opt/conda/lib/python3.10/site-packages (from openai<2.0.0,>=1.24.0->langchain_openai) (4.2.0)\nRequirement already satisfied: distro<2,>=1.7.0 in /opt/conda/lib/python3.10/site-packages (from openai<2.0.0,>=1.24.0->langchain_openai) (1.9.0)\nRequirement already satisfied: httpx<1,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from openai<2.0.0,>=1.24.0->langchain_openai) (0.27.0)\nRequirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from openai<2.0.0,>=1.24.0->langchain_openai) (1.3.0)\nRequirement already satisfied: annotated-types>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<3,>=1->langchain) (0.6.0)\nRequirement already satisfied: pydantic-core==2.14.6 in /opt/conda/lib/python3.10/site-packages (from pydantic<3,>=1->langchain) (2.14.6)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain_community) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain_community) (3.6)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain_community) (2024.2.2)\nRequirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain_community) (3.0.3)\nRequirement already satisfied: regex>=2022.1.18 in /opt/conda/lib/python3.10/site-packages (from tiktoken<1,>=0.5.2->langchain_openai) (2023.12.25)\nRequirement already satisfied: crcmod>=1.7 in /opt/conda/lib/python3.10/site-packages (from oss2->modelscope) (1.7)\nRequirement already satisfied: pycryptodome>=3.4.7 in /opt/conda/lib/python3.10/site-packages (from oss2->modelscope) (3.20.0)\nCollecting aliyun-python-sdk-kms>=2.4.1 (from oss2->modelscope)\n  Downloading aliyun_python_sdk_kms-2.16.3-py2.py3-none-any.whl.metadata (1.5 kB)\nCollecting aliyun-python-sdk-core>=2.13.12 (from oss2->modelscope)\n  Downloading aliyun-python-sdk-core-2.15.1.tar.gz (443 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m443.1/443.1 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->modelscope) (2023.3.post1)\nRequirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->modelscope) (2023.4)\nRequirement already satisfied: importlib-metadata>=6.6.0 in /opt/conda/lib/python3.10/site-packages (from yapf->modelscope) (6.11.0)\nRequirement already satisfied: platformdirs>=3.5.1 in /opt/conda/lib/python3.10/site-packages (from yapf->modelscope) (4.2.0)\nRequirement already satisfied: tomli>=2.0.1 in /opt/conda/lib/python3.10/site-packages (from yapf->modelscope) (2.0.1)\nCollecting jmespath<1.0.0,>=0.9.3 (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope)\n  Downloading jmespath-0.10.0-py2.py3-none-any.whl.metadata (8.0 kB)\nRequirement already satisfied: cryptography>=2.6.0 in /opt/conda/lib/python3.10/site-packages (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (41.0.7)\nRequirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.24.0->langchain_openai) (1.2.0)\nRequirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.24.0->langchain_openai) (1.0.5)\nRequirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.24.0->langchain_openai) (0.14.0)\nRequirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.10/site-packages (from importlib-metadata>=6.6.0->yapf->modelscope) (3.17.0)\nRequirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.2.0,>=0.1.52->langchain_community) (2.4)\nRequirement already satisfied: mypy-extensions>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community) (1.0.0)\nCollecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.7.0->rapidocr-onnxruntime)\n  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)\nRequirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->onnxruntime>=1.7.0->rapidocr-onnxruntime) (1.3.0)\nRequirement already satisfied: cffi>=1.12 in /opt/conda/lib/python3.10/site-packages (from cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (1.16.0)\nRequirement already satisfied: pycparser in /opt/conda/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (2.21)\nDownloading langchain_community-0.0.38-py3-none-any.whl (2.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m32.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading langchain-0.1.19-py3-none-any.whl (1.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m49.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading rapidocr_onnxruntime-1.3.17-py3-none-any.whl (14.9 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.9/14.9 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading modelscope-1.14.0-py3-none-any.whl (5.7 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m76.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m47.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0mm0:01\u001b[0mm\n\u001b[?25hDownloading langchain_openai-0.1.6-py3-none-any.whl (34 kB)\nDownloading langchain_core-0.1.52-py3-none-any.whl (302 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.9/302.9 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\nDownloading langsmith-0.1.56-py3-none-any.whl (120 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.8/120.8 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading onnxruntime-1.17.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading addict-2.4.0-py3-none-any.whl (3.8 kB)\nDownloading einops-0.8.0-py3-none-any.whl (43 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading aliyun_python_sdk_kms-2.16.3-py2.py3-none-any.whl (98 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.1/98.1 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading packaging-23.2-py3-none-any.whl (53 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading jmespath-0.10.0-py2.py3-none-any.whl (24 kB)\nBuilding wheels for collected packages: oss2, aliyun-python-sdk-core\n  Building wheel for oss2 (setup.py) ... \u001b[?25ldone\n\u001b[?25h  Created wheel for oss2: filename=oss2-2.18.5-py3-none-any.whl size=118146 sha256=14f7049c1b0af8270351371444ce59b0648cd4831d5d9edf99a201b33719d54c\n  Stored in directory: /root/.cache/pip/wheels/c5/ec/94/a908b823ad209d91fb3cb809c0553032e496dd3d36218e4596\n  Building wheel for aliyun-python-sdk-core (setup.py) ... \u001b[?25ldone\n\u001b[?25h  Created wheel for aliyun-python-sdk-core: filename=aliyun_python_sdk_core-2.15.1-py3-none-any.whl size=535325 sha256=4354299ce94e6d33cfb27851385ca487d109545027e585046ba7c79d35b7e1e4\n  Stored in directory: /root/.cache/pip/wheels/69/4b/8e/0a28e00f4cf43b273c18cce083804738d41013e017da922ce4\nSuccessfully built oss2 aliyun-python-sdk-core\nInstalling collected packages: addict, packaging, orjson, jmespath, humanfriendly, faiss-cpu, einops, coloredlogs, onnxruntime, langsmith, aliyun-python-sdk-core, rapidocr-onnxruntime, langchain-core, aliyun-python-sdk-kms, oss2, langchain-text-splitters, langchain_openai, langchain_community, modelscope, langchain\n  Attempting uninstall: packaging\n    Found existing installation: packaging 21.3\n    Uninstalling packaging-21.3:\n      Successfully uninstalled packaging-21.3\n  Attempting uninstall: orjson\n    Found existing installation: orjson 3.9.10\n    Uninstalling orjson-3.9.10:\n      Successfully uninstalled orjson-3.9.10\n  Attempting uninstall: jmespath\n    Found existing installation: jmespath 1.0.1\n    Uninstalling jmespath-1.0.1:\n      Successfully uninstalled jmespath-1.0.1\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ncudf 23.8.0 requires cubinlinker, which is not installed.\ncudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ncudf 23.8.0 requires ptxcompiler, which is not installed.\ncuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\ndask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.\nkeras-cv 0.8.2 requires keras-core, which is not installed.\nkeras-nlp 0.9.3 requires keras-core, which is not installed.\ntensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.\napache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.\napache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.\napache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 15.0.2 which is incompatible.\nboto3 1.26.100 requires botocore<1.30.0,>=1.29.100, but you have botocore 1.34.69 which is incompatible.\ncudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.4.0 which is incompatible.\ncudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.\ncudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.\ncudf 23.8.0 requires pyarrow==11.*, but you have pyarrow 15.0.2 which is incompatible.\ncuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.\ndask-cuda 23.8.0 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.\ndask-cuda 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.\ndask-cudf 23.8.0 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.\ndask-cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.\ndistributed 2023.7.1 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.\nfastai 2.7.14 requires torch<2.3,>=1.10, but you have torch 2.3.0 which is incompatible.\ngoogle-cloud-bigquery 2.34.4 requires packaging<22.0dev,>=14.3, but you have packaging 23.2 which is incompatible.\njupyterlab 4.1.6 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.\njupyterlab-lsp 5.1.0 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.\nlibpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nmomepy 0.7.0 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.\nosmnx 1.9.2 requires shapely>=2.0, but you have shapely 1.8.5.post1 which is incompatible.\nraft-dask 23.8.0 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.\nspopt 0.6.0 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\ntensorflow 2.15.0 requires keras<2.16,>=2.15.0, but you have keras 3.2.1 which is incompatible.\nydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.26.4 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed addict-2.4.0 aliyun-python-sdk-core-2.15.1 aliyun-python-sdk-kms-2.16.3 coloredlogs-15.0.1 einops-0.8.0 faiss-cpu-1.8.0 humanfriendly-10.0 jmespath-0.10.0 langchain-0.1.19 langchain-core-0.1.52 langchain-text-splitters-0.0.1 langchain_community-0.0.38 langchain_openai-0.1.6 langsmith-0.1.56 modelscope-1.14.0 onnxruntime-1.17.3 orjson-3.10.3 oss2-2.18.5 packaging-23.2 rapidocr-onnxruntime-1.3.17\n","output_type":"stream"}]},{"cell_type":"code","source":"! python indexer.py","metadata":{"id":"ND059GqqxqDz","execution":{"iopub.status.busy":"2024-05-09T18:32:00.111894Z","iopub.execute_input":"2024-05-09T18:32:00.112819Z","iopub.status.idle":"2024-05-09T18:32:50.218228Z","shell.execute_reply.started":"2024-05-09T18:32:00.112783Z","shell.execute_reply":"2024-05-09T18:32:50.217197Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"2024-05-09 18:32:24,616 - modelscope - INFO - PyTorch version 2.3.0 Found.\n2024-05-09 18:32:24,619 - modelscope - INFO - TensorFlow version 2.15.0 Found.\n2024-05-09 18:32:24,620 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n2024-05-09 18:32:24,620 - modelscope - INFO - No valid ast index found from /root/.cache/modelscope/ast_indexer, generating ast index from prebuilt!\n2024-05-09 18:32:24,785 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 9b69058f7ef1df5bf58f0d94dff00023 and a total number of 976 components indexed\n2024-05-09 18:32:31,337 - modelscope - WARNING - Model revision not specified, use revision: v1.1.0\nDownloading: 100%|█████████████████████████████| 886/886 [00:00<00:00, 3.96MB/s]\nDownloading: 100%|█████████████████████████| 2.08k/2.08k [00:00<00:00, 9.84MB/s]\nDownloading: 100%|██████████████████████████| 60.7k/60.7k [00:00<00:00, 758kB/s]\nDownloading: 100%|███████████████████████████| 388M/388M [00:05<00:00, 76.3MB/s]\nDownloading: 100%|█████████████████████████| 9.71k/9.71k [00:00<00:00, 32.2MB/s]\nDownloading: 100%|██████████████████████████████| 112/112 [00:00<00:00, 678kB/s]\nDownloading: 100%|█████████████████████████████| 332/332 [00:00<00:00, 1.40MB/s]\nDownloading: 100%|████████████████████████████| 107k/107k [00:00<00:00, 667kB/s]\n2024-05-09 18:32:44,705 - modelscope - INFO - initiate model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:32:44,705 - modelscope - INFO - initiate model from location /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base.\n2024-05-09 18:32:44,707 - modelscope - INFO - initialize model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:32:45,864 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:32:45,864 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:32:45,864 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base'}. trying to build by task and model information.\n2024-05-09 18:32:45,975 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:32:45,975 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:32:45,976 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base', 'sequence_length': 128}. trying to build by task and model information.\n/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:1051: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n  warnings.warn(\nfaiss saved!\n","output_type":"stream"}]},{"cell_type":"code","source":"# ! pip install langchain_openai","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:32:50.220385Z","iopub.execute_input":"2024-05-09T18:32:50.220694Z","iopub.status.idle":"2024-05-09T18:32:50.225041Z","shell.execute_reply.started":"2024-05-09T18:32:50.220665Z","shell.execute_reply":"2024-05-09T18:32:50.224035Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"# ! python /kaggle/input/llms-2-2/rag.py","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:32:50.226461Z","iopub.execute_input":"2024-05-09T18:32:50.226811Z","iopub.status.idle":"2024-05-09T18:32:50.233077Z","shell.execute_reply.started":"2024-05-09T18:32:50.226780Z","shell.execute_reply":"2024-05-09T18:32:50.232216Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"# ! python rag.py","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:32:50.234706Z","iopub.execute_input":"2024-05-09T18:32:50.234979Z","iopub.status.idle":"2024-05-09T18:32:50.242145Z","shell.execute_reply.started":"2024-05-09T18:32:50.234957Z","shell.execute_reply":"2024-05-09T18:32:50.241384Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"import subprocess\n\n# 启动 API 服务器\napi_server_process = subprocess.Popen(\n    [\"python\", \"-m\", \"vllm.entrypoints.openai.api_server\", \"--model\", \"qwen/Qwen-7B-Chat-Int4\", \"--trust-remote-code\", \"-q\", \"gptq\", \"--dtype\", \"float16\", \"--gpu-memory-utilization\", \"0.8\"]\n)","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:34:48.635782Z","iopub.execute_input":"2024-05-09T18:34:48.636525Z","iopub.status.idle":"2024-05-09T18:34:48.643482Z","shell.execute_reply.started":"2024-05-09T18:34:48.636493Z","shell.execute_reply":"2024-05-09T18:34:48.642244Z"},"trusted":true},"execution_count":13,"outputs":[{"name":"stdout","text":"WARNING 05-09 18:34:53 config.py:205] gptq quantization is not fully optimized yet. The speed can be slower than non-quantized models.\nINFO 05-09 18:34:53 llm_engine.py:100] Initializing an LLM engine (v0.4.2) with config: model='qwen/Qwen-7B-Chat-Int4', speculative_config=None, tokenizer='qwen/Qwen-7B-Chat-Int4', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=gptq, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=qwen/Qwen-7B-Chat-Int4)\nWARNING 05-09 18:34:54 tokenizer.py:126] Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.\nINFO 05-09 18:34:54 utils.py:660] Found nccl from library /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1\nINFO 05-09 18:34:56 selector.py:69] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.\nINFO 05-09 18:34:56 selector.py:32] Using XFormers backend.\nINFO 05-09 18:34:57 weight_utils.py:199] Using model weights format ['*.safetensors']\nINFO 05-09 18:35:00 model_runner.py:175] Loading model weights took 5.5124 GB\nINFO 05-09 18:35:04 gpu_executor.py:114] # GPU blocks: 558, # CPU blocks: 512\nINFO 05-09 18:35:06 model_runner.py:937] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.\nINFO 05-09 18:35:06 model_runner.py:941] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\nINFO 05-09 18:35:19 model_runner.py:1017] Graph capturing finished in 12 secs.\nWARNING 05-09 18:35:20 tokenizer.py:126] Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.\nWARNING 05-09 18:35:20 serving_chat.py:391] No chat template provided. Chat API will not work.\nWARNING 05-09 18:35:21 tokenizer.py:126] Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.\n","output_type":"stream"},{"name":"stderr","text":"INFO:     Started server process [398]\nINFO:     Waiting for application startup.\nINFO:     Application startup complete.\nINFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)\n","output_type":"stream"}]},{"cell_type":"code","source":"# 打开一个管道，用于向rag.py发送输入\nwith subprocess.Popen([\"python\", \"/kaggle/input/llms-2-5/rag.py\"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) as rag_process:\n    # 向rag.py发送输入\n    user_input = \"大模型的现状\"\n    stdout, stderr = rag_process.communicate(input=user_input)\n\n# 当rag.py执行完毕后，处理其输出\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")\nprint(\"rag_py—stdout:\",stdout)\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")\nprint(\"rag_py—stderr:\",stderr)\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:35:28.704928Z","iopub.execute_input":"2024-05-09T18:35:28.705621Z","iopub.status.idle":"2024-05-09T18:35:50.021108Z","shell.execute_reply.started":"2024-05-09T18:35:28.705589Z","shell.execute_reply":"2024-05-09T18:35:50.020114Z"},"trusted":true},"execution_count":14,"outputs":[{"name":"stdout","text":"INFO 05-09 18:35:31 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\n","output_type":"stream"},{"name":"stderr","text":"\nNo chat template is defined for this tokenizer - using a default chat template that implements the ChatML format (without BOS/EOS tokens!). If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n\n","output_type":"stream"},{"name":"stdout","text":"INFO 05-09 18:35:39 async_llm_engine.py:529] Received request cmpl-23dcece630bd40168eb8cb46da8be7f8: prompt: \"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\n\\nAnswer the question based only on the following context:\\n\\n[Document(page_content='3.5 政策支持： 地方鼓励，中央规范，以科学数据支持大模型开发', metadata={'source': 'LLM.pdf', 'page': 34}), Document(page_content='资料来源：灼识咨询、阿里云，中航证券研究所  1.2 大模型的技术原理：以“大规模预训练 +微调”范式满足多元化需求', metadata={'source': 'LLM.pdf', 'page': 5}), Document(page_content='大 模 型 可 以 快 速 并 大 规 模\\\\n地与云计算 、互 联 网 等 其\\\\n他技术结合 ，广 泛 地 应 用\\\\n在经济的各个 领 域 。\\\\n大 模 型 参 数 超 过 百 亿 级 时 ，', metadata={'source': 'LLM.pdf', 'page': 5}), Document(page_content='\\\\uf070按照功能可分为 NLP大模型 、CV大模型 、科学计算大模型和多模态大模型 。', metadata={'source': 'LLM.pdf', 'page': 6}), Document(page_content='\\\\uf070大模型现状： GPT引领，百模征战。 （1）ChatGPT 加速迭代： 从GPT-1至GPT3.5 跨越4年多时间， ChatGPT 发布仅一年，', metadata={'source': 'LLM.pdf', 'page': 1})]\\n\\nQuestion: 大模型的现状\\n<|im_end|>\\n<|im_start|>assistant\\n\", sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.7, top_p=1.0, top_k=-1, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|im_end|>'], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=7817, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 271, 16141, 279, 3405, 3118, 1172, 389, 279, 2701, 2266, 1447, 58, 7524, 12024, 7495, 1131, 18, 13, 20, 80090, 123, 99560, 100143, 5122, 53599, 108, 23384, 104125, 3837, 100679, 101931, 3837, 23031, 99891, 20074, 100143, 26288, 104949, 100013, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 19, 38842, 11789, 12024, 7495, 1131, 101111, 89161, 5122, 111294, 99334, 100703, 5373, 102661, 99718, 3837, 15946, 99440, 100719, 105601, 220, 220, 16, 13, 17, 40666, 100, 104949, 105535, 105318, 5122, 23031, 2073, 105483, 98841, 104034, 488, 48934, 47872, 854, 99453, 28330, 101929, 106154, 100354, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 20, 38842, 11789, 12024, 7495, 1131, 26288, 6567, 44401, 4891, 23781, 26853, 107, 220, 23031, 88940, 104, 220, 94299, 74577, 114, 40666, 100, 54955, 226, 6567, 44401, 1699, 29490, 57218, 107692, 220, 5373, 99524, 8908, 223, 242, 10236, 121, 239, 10236, 255, 231, 34369, 114, 1699, 42411, 99361, 100374, 41175, 80942, 59053, 249, 53599, 108, 95522, 242, 220, 11622, 1699, 18493, 99346, 9370, 101284, 18137, 95, 228, 4891, 253, 253, 59133, 59, 77, 26288, 6567, 44401, 4891, 23781, 26853, 224, 47685, 8908, 114, 227, 32181, 229, 68294, 122, 220, 53356, 10236, 118, 100, 220, 13343, 41175, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 20, 38842, 11789, 12024, 7495, 44060, 1704, 15, 22, 15, 101892, 98380, 113957, 451, 12567, 26288, 104949, 220, 5373, 19589, 26288, 104949, 220, 5373, 99891, 100768, 26288, 104949, 33108, 42140, 53772, 35243, 26288, 104949, 59133, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 21, 38842, 11789, 12024, 7495, 44060, 1704, 15, 22, 15, 26288, 104949, 105044, 5122, 479, 2828, 104353, 3837, 99271, 53772, 118811, 1773, 42344, 16, 7552, 15672, 38, 2828, 93883, 94299, 113862, 5122, 220, 45181, 38, 2828, 12, 16, 56137, 38, 2828, 18, 13, 20, 8908, 71933, 99236, 19, 7948, 42140, 20450, 3837, 12853, 38, 2828, 69425, 51827, 99373, 100695, 3837, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 16, 5410, 2533, 14582, 25, 40666, 100, 104949, 9370, 105044, 198, 151645, 198, 151644, 77091, 198], lora_request: None.\nINFO 05-09 18:35:40 metrics.py:334] Avg prompt throughput: 41.5 tokens/s, Avg generation throughput: 0.1 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 4.3%, CPU KV cache usage: 0.0%\nINFO 05-09 18:35:45 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 44.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 6.8%, CPU KV cache usage: 0.0%\nINFO 05-09 18:35:48 async_llm_engine.py:120] Finished request cmpl-23dcece630bd40168eb8cb46da8be7f8.\nINFO:     127.0.0.1:36768 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n————————————————————————————————————————————————————————————————————————————————————\nrag_py—stdout: query:大模型现状：GPT引领，百模征战。 ChatGPT 加速迭代： 从GPT-1至GPT3.5 跨越4年多时间， ChatGPT 发布仅一年，大 模 型 参 数 超 过 百 亿 级 时，大 模 型 可 以 快 速 并 大 规 模地与云计算 、互 联 网 等 其他技术结合 ，广 泛 地 应 用 在经济的各个 领 域 。大 模 型 可 以 快 速 并 大 规 模地与云计算 、互 联 网 等 其他技术结合 ，广 泛 地 应 用 在经济的各个 领 域 。大 模 型 可 以 快 速 并 大 规 模地与云计算 、互 联 网 等 其他技术结合 ，广 泛 地 应 用 在经济的各个 领 域 。大 模 型 可 以 快 速 并 大 规 模地与云计算 、互 联 网 等 其他技术结合 ，广 泛 地 应 用 在经济的各个 领 域 。大 模 型 可 以 快 速 并 大 规 模地与云计算 、互 联 网 等 其他技术结合 ，广 泛 地 应 用 在经济的各个 领 域 。\n\n————————————————————————————————————————————————————————————————————————————————————\nrag_py—stderr: 2024-05-09 18:35:32,217 - modelscope - INFO - PyTorch version 2.3.0 Found.\n2024-05-09 18:35:32,220 - modelscope - INFO - TensorFlow version 2.15.0 Found.\n2024-05-09 18:35:32,220 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n2024-05-09 18:35:32,367 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 9b69058f7ef1df5bf58f0d94dff00023 and a total number of 976 components indexed\n2024-05-09 18:35:37,234 - modelscope - WARNING - Model revision not specified, use revision: v1.1.0\n2024-05-09 18:35:37,666 - modelscope - INFO - initiate model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:35:37,667 - modelscope - INFO - initiate model from location /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base.\n2024-05-09 18:35:37,668 - modelscope - INFO - initialize model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:35:38,842 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:35:38,842 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:35:38,842 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base'}. trying to build by task and model information.\n2024-05-09 18:35:38,897 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:35:38,897 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:35:38,897 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base', 'sequence_length': 128}. trying to build by task and model information.\n/opt/conda/lib/python3.10/site-packages/langchain_core/utils/utils.py:161: UserWarning: WARNING! stop is not default parameter.\n                stop was transferred to model_kwargs.\n                Please confirm that stop is what you intended.\n  warnings.warn(\n/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:1051: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n  warnings.warn(\n\n————————————————————————————————————————————————————————————————————————————————————\nINFO 05-09 18:35:51 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 26.7 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:36:01 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:36:11 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:36:21 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\n","output_type":"stream"}]},{"cell_type":"code","source":"# 打开一个管道，用于向rag.py发送输入\nwith subprocess.Popen([\"python\", \"/kaggle/input/llms-2-5/rag.py\"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) as rag_process:\n    # 向rag.py发送输入\n    user_input = \"大模型的未来\"\n    stdout, stderr = rag_process.communicate(input=user_input)\n\n# 当rag.py执行完毕后，处理其输出\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")\nprint(\"rag_py—stdout:\",stdout)\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")\nprint(\"rag_py—stderr:\",stderr)\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:36:27.308718Z","iopub.execute_input":"2024-05-09T18:36:27.309070Z","iopub.status.idle":"2024-05-09T18:36:42.550449Z","shell.execute_reply.started":"2024-05-09T18:36:27.309044Z","shell.execute_reply":"2024-05-09T18:36:42.549513Z"},"trusted":true},"execution_count":15,"outputs":[{"name":"stdout","text":"INFO 05-09 18:36:31 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:36:38 async_llm_engine.py:529] Received request cmpl-e27203f9726240c496ee5d3aa298315d: prompt: \"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\n\\nAnswer the question based only on the following context:\\n\\n[Document(page_content='大 模 型 可 以 快 速 并 大 规 模\\\\n地与云计算 、互 联 网 等 其\\\\n他技术结合 ，广 泛 地 应 用\\\\n在经济的各个 领 域 。\\\\n大 模 型 参 数 超 过 百 亿 级 时 ，', metadata={'source': 'LLM.pdf', 'page': 5}), Document(page_content='\\\\uf070大模型未来：应用多点开花，产业智能跃迁。 （1）内容变革： 拥有通用性、基础性多模态、参数多、训练数据量大、生成内容高质稳定等特征的 AI大模型成为了自动化内容生产的“工厂”', metadata={'source': 'LLM.pdf', 'page': 1}), Document(page_content='3.5 政策支持： 地方鼓励，中央规范，以科学数据支持大模型开发', metadata={'source': 'LLM.pdf', 'page': 34}), Document(page_content='资料来源：灼识咨询、阿里云，中航证券研究所  1.2 大模型的技术原理：以“大规模预训练 +微调”范式满足多元化需求', metadata={'source': 'LLM.pdf', 'page': 5}), Document(page_content='4.1 AI 大模型行业预判\\\\n资料来源：中 航证券研究所  图表44：AI大模型行业预判\\\\n大模型作为 工业革命级的生产力工具 ，赋能产业数字化\\\\n大模型迭代升级，多模态化是大势所趋', metadata={'source': 'LLM.pdf', 'page': 36})]\\n\\nQuestion: 大模型的未来\\n<|im_end|>\\n<|im_start|>assistant\\n\", sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.7, top_p=1.0, top_k=-1, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|im_end|>'], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=7773, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 271, 16141, 279, 3405, 3118, 1172, 389, 279, 2701, 2266, 1447, 58, 7524, 12024, 7495, 1131, 26288, 6567, 44401, 4891, 23781, 26853, 107, 220, 23031, 88940, 104, 220, 94299, 74577, 114, 40666, 100, 54955, 226, 6567, 44401, 1699, 29490, 57218, 107692, 220, 5373, 99524, 8908, 223, 242, 10236, 121, 239, 10236, 255, 231, 34369, 114, 1699, 42411, 99361, 100374, 41175, 80942, 59053, 249, 53599, 108, 95522, 242, 220, 11622, 1699, 18493, 99346, 9370, 101284, 18137, 95, 228, 4891, 253, 253, 59133, 59, 77, 26288, 6567, 44401, 4891, 23781, 26853, 224, 47685, 8908, 114, 227, 32181, 229, 68294, 122, 220, 53356, 10236, 118, 100, 220, 13343, 41175, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 20, 38842, 11789, 12024, 7495, 44060, 1704, 15, 22, 15, 26288, 104949, 100353, 5122, 99892, 42140, 27442, 109369, 3837, 99404, 100168, 100560, 100287, 1773, 42344, 16, 7552, 43815, 105316, 5122, 6567, 233, 98, 18830, 105600, 33071, 5373, 99896, 33071, 42140, 53772, 35243, 5373, 32665, 42140, 5373, 104034, 20074, 32757, 26288, 5373, 43959, 43815, 44636, 99178, 100407, 49567, 104363, 9370, 15235, 26288, 104949, 104989, 105550, 43815, 106596, 2073, 104285, 854, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 16, 38842, 11789, 12024, 7495, 1131, 18, 13, 20, 80090, 123, 99560, 100143, 5122, 53599, 108, 23384, 104125, 3837, 100679, 101931, 3837, 23031, 99891, 20074, 100143, 26288, 104949, 100013, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 19, 38842, 11789, 12024, 7495, 1131, 101111, 89161, 5122, 111294, 99334, 100703, 5373, 102661, 99718, 3837, 15946, 99440, 100719, 105601, 220, 220, 16, 13, 17, 40666, 100, 104949, 105535, 105318, 5122, 23031, 2073, 105483, 98841, 104034, 488, 48934, 47872, 854, 99453, 28330, 101929, 106154, 100354, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 20, 38842, 11789, 12024, 7495, 1131, 19, 13, 16, 15235, 40666, 100, 104949, 99717, 98841, 99713, 1699, 101111, 89161, 5122, 15946, 8908, 230, 103, 100719, 105601, 220, 90867, 20742, 19, 19, 5122, 15469, 26288, 104949, 99717, 98841, 99713, 1699, 26288, 104949, 100622, 83002, 98, 40952, 104073, 52334, 9370, 109650, 102011, 41175, 107096, 99404, 104721, 1699, 26288, 104949, 113862, 100402, 3837, 42140, 53772, 35243, 32108, 20412, 26288, 99272, 31838, 100083, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 21, 5410, 2533, 14582, 25, 40666, 100, 104949, 9370, 100353, 198, 151645, 198, 151644, 77091, 198], lora_request: None.\nINFO 05-09 18:36:38 metrics.py:334] Avg prompt throughput: 53.7 tokens/s, Avg generation throughput: 0.1 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 4.8%, CPU KV cache usage: 0.0%\nINFO 05-09 18:36:41 async_llm_engine.py:120] Finished request cmpl-e27203f9726240c496ee5d3aa298315d.\nINFO:     127.0.0.1:33052 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n————————————————————————————————————————————————————————————————————————————————————\nrag_py—stdout: query:大模型的未来将表现为应用多点开花，产业智能跃迁。大模型将拥有通用性、基础性多模态、参数多、训练数据量大、生成内容高质稳定等特征，成为自动化内容生产的“工厂”。同时，大模型也会受益于地方鼓励、中央规范的政策支持和科学数据的支持。大模型的技术原理以“大规模预训练+微调”范式满足多元化需求。大模型也将迭代升级，多模态化是大势所趋。\n\n————————————————————————————————————————————————————————————————————————————————————\nrag_py—stderr: 2024-05-09 18:36:30,792 - modelscope - INFO - PyTorch version 2.3.0 Found.\n2024-05-09 18:36:30,794 - modelscope - INFO - TensorFlow version 2.15.0 Found.\n2024-05-09 18:36:30,794 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n2024-05-09 18:36:30,942 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 9b69058f7ef1df5bf58f0d94dff00023 and a total number of 976 components indexed\n2024-05-09 18:36:35,967 - modelscope - WARNING - Model revision not specified, use revision: v1.1.0\n2024-05-09 18:36:36,410 - modelscope - INFO - initiate model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:36:36,410 - modelscope - INFO - initiate model from location /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base.\n2024-05-09 18:36:36,411 - modelscope - INFO - initialize model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:36:37,594 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:36:37,594 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:36:37,594 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base'}. trying to build by task and model information.\n2024-05-09 18:36:37,648 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:36:37,648 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:36:37,648 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base', 'sequence_length': 128}. trying to build by task and model information.\n/opt/conda/lib/python3.10/site-packages/langchain_core/utils/utils.py:161: UserWarning: WARNING! stop is not default parameter.\n                stop was transferred to model_kwargs.\n                Please confirm that stop is what you intended.\n  warnings.warn(\n/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:1051: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n  warnings.warn(\n\n————————————————————————————————————————————————————————————————————————————————————\nINFO 05-09 18:36:51 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.8 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:37:01 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:37:11 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:37:21 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:37:31 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:37:41 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:37:51 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:38:01 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\n","output_type":"stream"}]},{"cell_type":"code","source":"# 打开一个管道，用于向rag.py发送输入\nwith subprocess.Popen([\"python\", \"/kaggle/input/llms-2-5/rag.py\"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) as rag_process:\n    # 向rag.py发送输入\n    user_input = \"人工智能的发展史\"\n    stdout, stderr = rag_process.communicate(input=user_input)\n\n# 当rag.py执行完毕后，处理其输出\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")\nprint(\"rag_py—stdout:\",stdout)\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")\nprint(\"rag_py—stderr:\",stderr)\nprint(\"————————————————————————————————————————————————————————————————————————————————————\")","metadata":{"execution":{"iopub.status.busy":"2024-05-09T18:38:04.026280Z","iopub.execute_input":"2024-05-09T18:38:04.026628Z","iopub.status.idle":"2024-05-09T18:38:18.884816Z","shell.execute_reply.started":"2024-05-09T18:38:04.026601Z","shell.execute_reply":"2024-05-09T18:38:18.883793Z"},"trusted":true},"execution_count":16,"outputs":[{"name":"stdout","text":"INFO 05-09 18:38:11 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:38:15 async_llm_engine.py:529] Received request cmpl-291b65507bd94d75aaeed43bf91b3bb6: prompt: \"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\n\\nAnswer the question based only on the following context:\\n\\n[Document(page_content='Statistical AISymbolic AI神经人工智能图表1：人工智能的发展历史1.1 大模型推动人工智能发展： ANI-AGI-ASI', metadata={'source': 'LLM.pdf', 'page': 3}), Document(page_content='资料来源：中国信通院、吉林大学学报、 Cornrell University 、《Attention Is All You Need 》，中航证券研究所整理\\\\uf070算法的迭代推动人工智能的发展 ：几十年来', metadata={'source': 'LLM.pdf', 'page': 3}), Document(page_content='➢神经（Neural ）AI：经历了 1980 s-2012 年神经网络 、2012 年后的深度学习两大阶段 ，深度神经网络和模型大小呈正相关 ，伴随着模型参数量的增加', metadata={'source': 'LLM.pdf', 'page': 3}), Document(page_content='机器学习1970 -1980s                     1985 至今 1980s -2012                         2017', metadata={'source': 'LLM.pdf', 'page': 3}), Document(page_content='：几十年来 ，AI领域持续探索 ，1940 -1980 s符号AI占主导 ，1980 年后，统计AI与神经 AI齐头并进 ，二者竞争发展至今 ，GPT系列属于神经 AI。', metadata={'source': 'LLM.pdf', 'page': 3})]\\n\\nQuestion: 人工智能的发展史\\n<|im_end|>\\n<|im_start|>assistant\\n\", sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.7, top_p=1.0, top_k=-1, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|im_end|>'], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=7831, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 271, 16141, 279, 3405, 3118, 1172, 389, 279, 2701, 2266, 1447, 58, 7524, 12024, 7495, 1131, 15878, 39496, 94100, 4001, 292, 15235, 102398, 104455, 116996, 16, 5122, 104455, 103949, 100022, 16, 13, 16, 40666, 100, 104949, 101890, 104455, 99185, 5122, 2100, 40, 12, 1890, 40, 12, 84502, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 38842, 11789, 12024, 7495, 1131, 101111, 89161, 5122, 58695, 21317, 31935, 93823, 5373, 102719, 99562, 47764, 42278, 5373, 21330, 16285, 3822, 220, 5373, 26940, 69329, 2160, 2009, 1446, 14656, 220, 87243, 15946, 99440, 100719, 105601, 104387, 59, 1704, 15, 22, 15, 107018, 9370, 113862, 101890, 104455, 103949, 30372, 109238, 36407, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 38842, 11789, 12024, 7495, 1131, 145182, 102398, 9909, 8813, 4176, 220, 7552, 15469, 5122, 106014, 220, 16, 24, 23, 15, 274, 12, 17, 15, 16, 17, 74577, 112, 102398, 71356, 220, 5373, 17, 15, 16, 17, 74577, 112, 104813, 102217, 100134, 105736, 100385, 41175, 102217, 102398, 71356, 33108, 104949, 92032, 100827, 36556, 78556, 41175, 107689, 104949, 32665, 32757, 9370, 100649, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 38842, 11789, 12024, 7495, 1131, 102182, 100134, 16, 24, 22, 15, 481, 16, 24, 23, 15, 82, 3824, 220, 16, 24, 23, 20, 58464, 111, 36171, 220, 16, 24, 23, 15, 82, 481, 17, 15, 16, 17, 5108, 220, 17, 15, 16, 22, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 38842, 11789, 12024, 7495, 1131, 5122, 109238, 36407, 41175, 15469, 100650, 100652, 101964, 41175, 16, 24, 19, 15, 481, 16, 24, 23, 15, 274, 108872, 15469, 99571, 105289, 41175, 16, 24, 23, 15, 74577, 112, 33447, 3837, 100787, 15469, 57218, 102398, 15235, 100046, 64355, 62926, 41299, 41175, 110566, 100218, 99185, 104411, 41175, 38, 2828, 100187, 100409, 102398, 15235, 1773, 516, 11160, 12854, 2427, 1210, 364, 4086, 44, 15995, 516, 364, 2893, 1210, 220, 18, 5410, 2533, 14582, 25, 220, 104455, 103949, 99497, 198, 151645, 198, 151644, 77091, 198], lora_request: None.\nINFO 05-09 18:38:16 metrics.py:334] Avg prompt throughput: 72.1 tokens/s, Avg generation throughput: 3.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 4.3%, CPU KV cache usage: 0.0%\nINFO 05-09 18:38:17 async_llm_engine.py:120] Finished request cmpl-291b65507bd94d75aaeed43bf91b3bb6.\nINFO:     127.0.0.1:35132 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n————————————————————————————————————————————————————————————————————————————————————\nrag_py—stdout: query:人工智能经历了符号AI和统计AI齐头并进的发展历史。自1940年代至今，AI领域一直在持续探索和发展。2017年以来，深度学习和神经AI取得了显著进展，推动了AI的发展。通过大模型的迭代，神经AI经历了神经网络和深度学习两大阶段，深度神经网络和模型大小呈正相关，伴随着模型参数量的增加。\n\n————————————————————————————————————————————————————————————————————————————————————\nrag_py—stderr: 2024-05-09 18:38:07,556 - modelscope - INFO - PyTorch version 2.3.0 Found.\n2024-05-09 18:38:07,559 - modelscope - INFO - TensorFlow version 2.15.0 Found.\n2024-05-09 18:38:07,559 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n2024-05-09 18:38:07,709 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 9b69058f7ef1df5bf58f0d94dff00023 and a total number of 976 components indexed\n2024-05-09 18:38:13,000 - modelscope - WARNING - Model revision not specified, use revision: v1.1.0\n2024-05-09 18:38:13,341 - modelscope - INFO - initiate model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:38:13,341 - modelscope - INFO - initiate model from location /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base.\n2024-05-09 18:38:13,343 - modelscope - INFO - initialize model from /root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base\n2024-05-09 18:38:14,478 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:38:14,478 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:38:14,478 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base'}. trying to build by task and model information.\n2024-05-09 18:38:14,532 - modelscope - WARNING - No preprocessor field found in cfg.\n2024-05-09 18:38:14,532 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n2024-05-09 18:38:14,532 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/root/.cache/modelscope/hub/iic/nlp_corom_sentence-embedding_chinese-base', 'sequence_length': 128}. trying to build by task and model information.\n/opt/conda/lib/python3.10/site-packages/langchain_core/utils/utils.py:161: UserWarning: WARNING! stop is not default parameter.\n                stop was transferred to model_kwargs.\n                Please confirm that stop is what you intended.\n  warnings.warn(\n/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:1051: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n  warnings.warn(\n\n————————————————————————————————————————————————————————————————————————————————————\nINFO 05-09 18:38:31 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.6 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:38:41 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:38:51 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:39:01 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:39:11 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:39:21 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:39:31 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-09 18:39:41 metrics.py:334] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\n","output_type":"stream"}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}